Analyze Station Locations in Vancouver

In [2]:
import requests
import pandas as pd
import numpy as np
import zipfile

#creat map
import folium
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
In [3]:
# load 2017 SkyTrain Station Rank and Average Weekday Boarding Data
# table I in 2017_TSPR_Summary.pdf, manually grab the data into the csv file
StationBoardings = pd.read_csv("ProjectData/2017SkyTrainStationRank.csv")
StationBoardings.head()
Out[3]:
Rank Station Line AWBoardings
0 1 Waterfront Canada and Expo Lines 37500
1 2 Commercial-Broadway Expo and Millennium Lines 24900
2 3 Burrard Expo Line 23000
3 4 Granville Expo Line 20600
4 5 Metrotown Expo Line 19900
In [4]:
# the GTFS zipfile is loaded into GTFS: a dictionary of dataframes
gtfs_zip = zipfile.ZipFile("ProjectData/google_transit.zip")
GTFS = {}
for table in gtfs_zip.namelist():
    table_name = table.split(".")[0]
    GTFS[table_name] = pd.read_csv(gtfs_zip.open(table))
print(GTFS.keys())
dict_keys(['agency', 'calendar', 'calendar_dates', 'feed_info', 'routes', 'shapes', 'stops', 'stop_times', 'transfers', 'trips', 'cardinal_directions_exceptions', 'direction_names_exceptions', 'stop_order_exceptions'])
In [54]:
# 8916 stops
# each skytrain platform has a parent_station
print(GTFS["stops"].loc[GTFS["stops"]["parent_station"]==99917])
print("---------------------------------------")
print(GTFS["stops"].loc[GTFS["stops"]["stop_id"]==99917])
      stop_id  stop_code                               stop_name  stop_desc  \
7854     8044    57969.0  Commercial-Broadway Station Platform 4        NaN   
7886     8073    57998.0  Commercial-Broadway Station Platform 3        NaN   
8183     8754    60822.0  Commercial-Broadway Station Platform 1        NaN   
8192     8763    60823.0  Commercial-Broadway Station Platform 2        NaN   

       stop_lat    stop_lon zone_id  stop_url  location_type  parent_station  
7854  49.262317 -123.069179    ZN 1       NaN              0         99917.0  
7886  49.262311 -123.069077    ZN 1       NaN              0         99917.0  
8183  49.262980 -123.068426    ZN 1       NaN              0         99917.0  
8192  49.262912 -123.068491    ZN 1       NaN              0         99917.0  
---------------------------------------
      stop_id  stop_code                    stop_name  stop_desc  stop_lat  \
8874    99917        NaN  Commercial-Broadway Station        NaN  49.26267   

        stop_lon zone_id  stop_url  location_type  parent_station  
8874 -123.068765    ZN 1       NaN              1             NaN  
In [8]:
# we are going to generate two station lists
# 1. existing rapid transit stations (parent_station): skytrain, seabus and west cost express
# 2. canadidate stations: exiting bus stops

RapidTransitStop_list = GTFS["stops"]["parent_station"].dropna().unique().tolist()
print("%d rapid transit stations"%len(RapidTransitStop_list))
RapidTransitStop_df = GTFS["stops"].loc[GTFS["stops"]["stop_id"].isin(RapidTransitStop_list)]
print(RapidTransitStop_df.head())
print(RapidTransitStop_df.shape)

#bus stops are rows where parent_station is NaN and stop_id not in the RapidTransitStop_list
BusStop_filter = ~GTFS["stops"]["stop_id"].isin(RapidTransitStop_list)
BusStop_filter = BusStop_filter & (GTFS["stops"]["parent_station"].isnull())
BusStop_df = GTFS["stops"].loc[BusStop_filter]
print("%d bus stops"%len(BusStop_df))
print(BusStop_df.head())
print(BusStop_df.shape)
59 rapid transit stations
      stop_id  stop_code                   stop_name  stop_desc   stop_lat  \
1466    12034        NaN          Waterfront Station        NaN  49.285687   
8858    99901        NaN         YVR-Airport Station        NaN  49.194174   
8859    99902        NaN   Sea Island Centre Station        NaN  49.192986   
8860    99903        NaN           Templeton Station        NaN  49.196688   
8861    99904        NaN  Richmond-Brighouse Station        NaN  49.167943   

        stop_lon zone_id  stop_url  location_type  parent_station  
1466 -123.111773    ZN 1       NaN              1             NaN  
8858 -123.178269    ZN 2       NaN              1             NaN  
8859 -123.157887    ZN 2       NaN              1             NaN  
8860 -123.146337    ZN 2       NaN              1             NaN  
8861 -123.136372    ZN 2       NaN              1             NaN  
(59, 10)
8726 bus stops
   stop_id  stop_code                          stop_name  stop_desc  \
0    10000    59326.0   Northbound No. 5 Rd @ McNeely Dr        NaN   
1    10001    59324.0  Northbound No. 5 Rd @ Woodhead Rd        NaN   
2    10002    59323.0    Southbound No. 5 Rd @ Cambie Rd        NaN   
3    10003    59325.0  Southbound No. 5 Rd @ Woodhead Rd        NaN   
4    10004    59327.0    Eastbound McNeely Dr @ No. 5 Rd        NaN   

    stop_lat    stop_lon zone_id  stop_url  location_type  parent_station  
0  49.179962 -123.091490  BUS ZN       NaN              0             NaN  
1  49.182670 -123.091448  BUS ZN       NaN              0             NaN  
2  49.184252 -123.091627  BUS ZN       NaN              0             NaN  
3  49.182051 -123.091659  BUS ZN       NaN              0             NaN  
4  49.179586 -123.091105  BUS ZN       NaN              0             NaN  
(8726, 10)
In [77]:
# show rapid transit station locations on map

# map centre lat,long
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
address = 'New Westminster, BC'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of rapid transit stations using latitude and longitude values
Station_Map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add bus stop markers to map
for lat, lng, stopid, stopname in zip(BusStop_df['stop_lat'], BusStop_df['stop_lon'], BusStop_df['stop_id'], BusStop_df['stop_name']):
    label = '{}, {}'.format(stopid, stopname)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=1,
        popup=label,
        color='#9b99c4',
        fill=True,
        fill_color='#9b99c4',
        fill_opacity=0.1,
        parse_html=False).add_to(Station_Map)  

# add rapid transit markers to map
for lat, lng, stopid, stopname in zip(RapidTransitStop_df['stop_lat'], RapidTransitStop_df['stop_lon'], RapidTransitStop_df['stop_id'], RapidTransitStop_df['stop_name']):
    label = '{}, {}'.format(stopid, stopname)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.9,
        parse_html=False).add_to(Station_Map)  
    
Station_Map
Out[77]:
In [ ]:
## Foursquare API - Venue Data

#get nearby venue - utility function
CLIENT_ID = 'RBSGZRHE5H03FMTM43053AYPDNC2TJNJO0W50FMFWNVJSHQT' # your Foursquare ID
CLIENT_SECRET = 'FIDHWOGNIKYG1KYWVAGIIH3B2WRO32Y4OLQOZVOUOUBZK2NG' # your Foursquare Secret
VERSION = '20171101' # Foursquare API version
radius = 400 #m
LIMIT = 100 #no more than 100 venues per neighbourhood

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        try:
            response = requests.get(url).json()["response"]
            results = response['groups'][0]['items']
        except:
            continue # to the next record, no venue around
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['id'],
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['stop_id', 
                  'stop_lat', 
                  'stop_lon', 
                  'VenueID',
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
In [ ]:
rapidtransit_venues = getNearbyVenues(names=RapidTransitStop_df['stop_id'],
                                     latitudes=RapidTransitStop_df['stop_lat'],
                                     longitudes=RapidTransitStop_df['stop_lon']
                                  )
rapidtransit_venues.to_csv("ProjectData/API_rapidtransit_venues.csv",index=False)
In [9]:
# input - df: a Dataframe, chunkSize: the chunk size
# output - a list of DataFrame
# purpose - splits the DataFrame into smaller of max size chunkSize (last is smaller)
def splitDataFrameIntoSmaller(df, chunkSize = 100): 
    listOfDf = list()
    numberChunks = len(df) // chunkSize + 1
    for i in range(numberChunks):
        listOfDf.append(df[i*chunkSize:(i+1)*chunkSize])
    return listOfDf
In [10]:
#split the bus stop dataframe into smaller chunks to query Foursquare API data 
BusStop_df_sections = splitDataFrameIntoSmaller(BusStop_df)
print("%d sections of (max) 100 stops"%len(BusStop_df_sections))
88 sections of (max) 100 stops
In [ ]:
# range 1 to 88 
for section in range(-1):
    rapidtransit_venues = getNearbyVenues(names=BusStop_df_sections[section]['stop_id'],
                                         latitudes=BusStop_df_sections[section]['stop_lat'],
                                         longitudes=BusStop_df_sections[section]['stop_lon']
                                      )
    rapidtransit_venues.to_csv("ProjectData/API_bus_venues_section%d.csv"%(section+1),index=False)
In [ ]:
# check Foursquare API limit
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    40.794123, 
    -73.953961, 
    500, 
    1)
requests.get(url).json()
# note: code 429 means you are over the daily call quota limit
# reset midnight UTC (5pm Vancouver Time)
In [32]:
# load all the venues from API calls
import os
directory = os.fsencode("ProjectData")
venue_df_list = []
for file in os.listdir(directory):
     filename = os.fsdecode(file)
     if "API" in filename and filename.endswith(".csv"): 
        venue_df_list.append(pd.read_csv("ProjectData/"+filename))
venue_df = pd.concat(venue_df_list, axis=0)

# check the venue categories
print('{} stops in the venue dataframe.'.format(len(venue_df['stop_id'].unique())))
print('There are {} uniques categories.'.format(len(venue_df['Venue Category'].unique())))
print('There are {} uniques venues.'.format(len(venue_df['VenueID'].unique())))
print(venue_df.head())
8340 stops in the venue dataframe.
There are 451 uniques categories.
There are 10861 uniques venues.
   stop_id   stop_lat    stop_lon                   VenueID  \
0     2660  49.209736 -122.994325  4bae7801f964a5203db63be3   
1     2660  49.209736 -122.994325  551c1d13498e6b30fa879f25   
2     2660  49.209736 -122.994325  4bd8948011dcc9280feef633   
3     2660  49.209736 -122.994325  4bf997345ec320a1bf848ad3   
4     2660  49.209736 -122.994325  4ba545dff964a5208df438e3   

                                  Venue  Venue Latitude  Venue Longitude  \
0                Gardenworks Mandeville       49.206704      -122.998270   
1  Riverway Golf Course & Driving Range       49.207437      -122.996536   
2                     Sun Tai Sang Farm       49.206914      -122.994924   
3                     Wing Wong Nursery       49.206530      -122.993784   
4                          Garden Works       49.206706      -122.998498   

   Venue Category  
0   Garden Center  
1     Golf Course  
2  Farmers Market  
3   Garden Center  
4   Garden Center  
In [33]:
# check the venues around a station (specify stop_id) on a map
stopid = 99930

latitude = GTFS["stops"].loc[GTFS["stops"]["stop_id"]==stopid]["stop_lat"].tolist()[0]
longitude = GTFS["stops"].loc[GTFS["stops"]["stop_id"]==stopid]["stop_lon"].tolist()[0]
stopname = GTFS["stops"].loc[GTFS["stops"]["stop_id"]==stopid]["stop_name"].tolist()[0]

station_venues = venue_df.loc[venue_df["stop_id"]==stopid]
print("%d venue(s) near %s"%(len(station_venues),stopname))

# create map of rapid transit stations using latitude and longitude values
StationVenue_Map = folium.Map(location=[latitude, longitude], tiles='cartodbpositron', zoom_start=16)

# add rapid transit markers to map
for lat, lng, venuename, venuetype in zip(station_venues['Venue Latitude'], station_venues['Venue Longitude'], station_venues['Venue'], station_venues['Venue Category']):
    label = '{}, {}'.format(venuename, venuetype)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='#d17389',
        fill=True,
        fill_color='#d17389',
        fill_opacity=0.9,
        parse_html=False).add_to(StationVenue_Map)  
    
# add the stop marker to map
label = '{}, {}'.format(stopid, stopname)
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
        [latitude, longitude],
        radius=10,
        popup=label,
        color='#8a1631',
        fill=True,
        fill_color='#8a1631',
        fill_opacity=0.9,
        parse_html=False).add_to(StationVenue_Map)  

StationVenue_Map
21 venue(s) near Coquitlam Central Station
Out[33]:
In [211]:
#print(venue_df['Venue Category'].unique())
venue_type = venue_df[['Venue Category']].drop_duplicates()
venue_type["Purpose"] = "Other"
venue_type_lookup = [["Restaurant","Restaurant"], #search keyword, type label 
                     ["Bar","Restaurant"],
                     ["Pizza","Restaurant"],
                     ["Sandwich","Restaurant"],
                     ["Cafe","Restaurant"],
                     ["Café","Restaurant"],
                     ["Diner","Restaurant"],
                     ["Burger","Restaurant"],
                     ["Deli","Restaurant"],
                     ["Breakfast","Restaurant"],
                     ["Food","Restaurant"],
                     ["Joint","Restaurant"],
                     ["Salad","Restaurant"],
                     ["Snack","Restaurant"],
                     ["Taco","Restaurant"],
                     ["Soup","Restaurant"],
                     ["Fried Chicken","Restaurant"],
                     ["Store","Store"],
                     ["Shop","Store"],
                     ["Supermarket","Store"],
                     ["Gastropub","Store"],
                     ["Trail","Park"],
                     ["Public Art","Park"],
                     ["Park","Park"],
                     ["Bank","Bank"],
                     ["Chiropractor","HealthCare"],
                     ["Physical Therapist","HealthCare"],
                     ["Pharmacy","HealthCare"],
                     ["Message","HealthCare"],
                     ["Dispensary","HealthCare"],
                     ["Hotel","Hotel"],
                     ["Motel","Hotel"],
                     ["Inn","Hotel"],
                     ["School","School"],
                     ["Office","Office"],
                     ["Tech Startup","Office"],
                     ["Lawyer","Office"],
                     ["Gym","Recreational"],
                     ["Yoga","Recreational"],
                     ["Dance","Recreational"],
                     ["Golf","Recreational"],
                     ["Laser Tag","Recreational"],
                     ["Entertainment","Recreational"],
                     ["Rock Club","Recreational"],
                     ["Photography Studio","Recreational"],
                     ["Stadium","Recreational"],
                     ["Bowling Alley","Recreational"],
                     ["Club","Recreational"],
                     ["Bus","Transportation"],
                     ["Station","Transportation"],
                     ["Airport","Transportation"],
                     ["Travel Lounge","Transportation"],
                     ["Heliport","Transportation"],
                     ["Transportation","Transportation"]]
for keyword, label in venue_type_lookup:
    venue_type["Purpose"] = np.where(venue_type["Venue Category"].str.contains(keyword),label,venue_type["Purpose"])
venue_type[venue_type["Purpose"]=="Other"]["Venue Category"].tolist()
Out[211]:
['Other Great Outdoors',
 'Moving Target',
 'Rental Service',
 'Brewery',
 'Performing Arts Venue',
 'Home Service',
 'Field',
 'Pool',
 'Market',
 'Bakery',
 'Tea Room',
 'Theater',
 'Butcher',
 'Fish Market',
 'Beach',
 'Bistro',
 'Boat or Ferry',
 'Harbor / Marina',
 'Boat Rental',
 'Hostel',
 'Bookstore',
 'Farmers Market',
 'Massage Studio',
 'Pub',
 'Nightclub',
 'Noodle House',
 'Sculpture Garden',
 'Shoe Repair',
 'Museum',
 'Art Gallery',
 'Movie Theater',
 'Dog Run',
 'Spa',
 'Track',
 'Baseball Field',
 'Auto Workshop',
 'River',
 'Lake',
 'Rental Car Location',
 'Ski Area',
 'Auto Garage',
 'History Museum',
 'Playground',
 'Concert Hall',
 'Planetarium',
 'Lounge',
 'Tennis Court',
 'Rest Area',
 'Basketball Court',
 'Volleyball Court',
 'Burrito Place',
 'Pier',
 'Event Service',
 'Soccer Field',
 'Neighborhood',
 'Scenic Lookout',
 'Nature Preserve',
 'Construction & Landscaping',
 'Steakhouse',
 'Plaza',
 'Poke Place',
 'Intersection',
 'Hockey Rink',
 'Flea Market',
 'Garden Center',
 'Boutique',
 'Casino',
 'Building',
 'Garden',
 'Platform',
 'Music Venue',
 'Speakeasy',
 'Community Center',
 'Science Museum',
 'Irish Pub',
 'Nail Salon',
 'Laundry Service',
 'Outdoor Sculpture',
 'Library',
 'Event Space',
 'Financial or Legal Service',
 'Athletics & Sports',
 'College Rec Center',
 'Skating Rink',
 'Night Market',
 'Botanical Garden',
 'Health & Beauty Service',
 'Storage Facility',
 'Used Bookstore',
 'Auto Dealership',
 'College Academic Building',
 'Fair',
 'Pool Hall',
 'Circus',
 'Hockey Arena',
 'Medical Center',
 'Monument / Landmark',
 'Arcade',
 'Hockey Field',
 'Creperie',
 'Tree',
 'Locksmith',
 'Bike Rental / Bike Share',
 'Outlet Mall',
 'Multiplex',
 'Residential Building (Apartment / Condo)',
 'Tanning Salon',
 'Aquarium',
 'Waterfront',
 'Outdoors & Recreation',
 'Historic Site',
 'Plane',
 'Distribution Center',
 'Dry Cleaner']
In [214]:
venue_type_count = venue_df[['VenueID', 'Venue Category']].drop_duplicates()
venue_type_count = venue_type_count.groupby(["Venue Category"]).count().reset_index().sort_values(by=["VenueID"],ascending=False)
venue_type_count = pd.merge(venue_type_count,venue_type,on="Venue Category")
print(venue_type_count.head(30))
            Venue Category  VenueID         Purpose
0              Coffee Shop      296           Store
1         Sushi Restaurant      131      Restaurant
2                     Park      129            Park
3       Chinese Restaurant      128      Restaurant
4                     Café      110      Restaurant
5           Sandwich Place      109      Restaurant
6            Grocery Store      108           Store
7     Fast Food Restaurant      107      Restaurant
8              Pizza Place       97      Restaurant
9      Japanese Restaurant       97      Restaurant
10                Bus Stop       89  Transportation
11   Vietnamese Restaurant       77      Restaurant
12                  Bakery       75           Other
13              Restaurant       74      Restaurant
14                    Bank       69            Bank
15                Pharmacy       67      HealthCare
16                   Hotel       63           Hotel
17            Burger Joint       57      Restaurant
18          Clothing Store       57           Store
19          Ice Cream Shop       56           Store
20                     Gym       53    Recreational
21            Liquor Store       52           Store
22      Italian Restaurant       51      Restaurant
23                     Pub       47           Other
24          Breakfast Spot       46      Restaurant
25        Asian Restaurant       45      Restaurant
26       Indian Restaurant       44      Restaurant
27  Furniture / Home Store       43           Store
28       Convenience Store       41           Store
29           Shopping Mall       39           Store
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: